PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\price_changes_luggage_July2025.csv"
OUT=price_changes_sellertype
DBMS=csv REPLACE;
run;
PROC IMPORT 
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\Sellerclusters_luggage_July2025.csv"
out = SELLER_TYPE_TREND1
DBMS=csv REPLACE;
run;
PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\pricechangetrend_luggage_July2025.csv" 
out=final_outputvector
DBMS=csv REPLACE;
run;

/***subsetting for the relevant analysis SKUs***/

data relevantSKUsonly;
set price_changes_sellertype;
where ASIN in ('B009R1FEQ8', 'B073ZG6HXV', 'B001DWK4NM','B00144MBWQ','B004HKSENY') ; 
run;


data relevantSKUs;
set price_changes_sellertype;
where ASIN in ('B009R1FEQ8', 'B073ZG6HXV', 'B001DWK4NM','B00144MBWQ','B004HKSENY') and seller_type in ('Amazon New', 'Cluster 1', 'Cluster 2'); 
run;

data relevantSKUs_used;
set price_changes_sellertype;
where ASIN in ('B009R1FEQ8', 'B073ZG6HXV', 'B001DWK4NM','B00144MBWQ','B004HKSENY') and seller_type not in ('Amazon New', 'Cluster 1', 'Cluster 2'); 
run;



PROC SQL;
CREATE TABLE numsellers_newoffers AS
SELECT
TIME1,
ASIN,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUs
group by
time1,
ASIN;
quit;

PROC SQL;
CREATE TABLE numsellersbyeachtype_newoffers AS
SELECT
TIME1,
ASIN,
seller_type,
COUNT(DISTINCT SELLER_ID) AS NUM_SELLERS
from 
relevantSKUsonly
group by
time1,
ASIN,
seller_type;
quit;

/**creating num sellers DV**/
proc sort data = numsellers_newoffers (keep=time1) nodupkey out= final_numsellers_outputvector;
by time1;
run;

%macro numsellers(b,c);
data numsellers&b. (keep = time1 NUM_SELLERS_&b.);
set numsellers_newoffers;
where ASIN = &c.;
rename NUM_SELLERS = NUM_SELLERS_&b. ;
run;

data final_numsellers_outputvector ;
merge final_numsellers_outputvector (in=a) numsellers&b. (in=b);
by time1;
if a ;
if a and not b then NUM_SELLERS_&b. = 0;
run;
%mend;
%numsellers(b = SAM, c = 'B009R1FEQ8'); 
/*%numsellers(b = DEL, c = 'B073ZG6HXV'); */
%numsellers(b = TC, c = 'B001DWK4NM'); 
%numsellers(b = OLY, c = 'B00144MBWQ'); 
%numsellers(b = RL, c = 'B004HKSENY'); 
proc print data = final_numsellers_outputvector (obs=10);
run;

PROC EXPORT 
DATA=final_numsellers_outputvector
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\Data Scraped\Amazon Others_merged\Cluster Analysis\numsellers_dv2outcomeluggage_July2025.csv"
REPLACE;
run;

/***covariate creation**/

proc sort data = SELLER_TYPE_TREND1 out = SELLER_TYPE_TREND1;
by time1;
run;
data SELLER_TYPE_TREND2;
set SELLER_TYPE_TREND1;
if seller_type in ('3P Old Focal', '3P Old Non-Focal') then seller_type = '3P Old';
else if seller_type in ('Amazon Old Focal', 'Amazon Old Non-Focal') then seller_type = 'Amazon Old';
else seller_type = seller_type;
run;

/**seller characteristics trend***/
proc freq data = SELLER_TYPE_TREND2;
tables seller_type;
run;

/*subset for 3p new sellers*/
data SELLER_TYPE_TREND3;
set SELLER_TYPE_TREND2;
where seller_type not in ('Amazon Ne', '3P Old', 'Amazon Ol');
run;

proc freq data = SELLER_TYPE_TREND3;
tables seller_type;
run;
PROC SQL;
CREATE TABLE SELLER_charcs_byasin AS
SELECT
TIME1,
ASIN,
SELLER_TYPE,
mean(FBA) as mean_FBA,
/*std(FBA) as sd_FBA,*/
mean(Freeship) as mean_freeship,
/*std(Freeship) as sd_freeship,*/
mean(Prime) as mean_prime,
/*std(Prime) as sd_prime,*/
sum(condition) as new_offers,
mean(num_seller_Posrating) as mean_posrating,
/*std(num_seller_Posrating) as sd_posrating,*/
mean(num_seller_ratings) as mean_numratings,
/*std(num_seller_ratings) as sd_numratings,*/
mean(Seller_Rating) as mean_starrate
/*std(Seller_Rating) as sd_starrate*/
from 
SELLER_TYPE_TREND3
group by
time1,
ASIN,
seller_type;
quit;

/*for std= . recoding those to 0*/
/*data SELLER_charcs_byasin1;*/
/*set SELLER_charcs_byasin;*/
/*if sd_FBA = . then sd_FBA = 0;*/
/*if sd_freeship = . then sd_freeship = 0;*/
/*if sd_prime = . then sd_prime = 0;*/
/*if sd_posrating = . then sd_posrating =0;*/
/*if sd_numratings = . then sd_numratings =0;*/
/*if sd_starrate = . then sd_starrate =0;*/
/*run;*/

/**converting the data by time, one row per date**/

proc contents data = SELLER_charcs_byasin1;
run;

proc sort data = SELLER_charcs_byasin (keep=time1) nodupkey out= SELLER_charcs_byasin2;
by time1;
run;


%macro sellerchar(b,c);
data SELLER_charcs_byasin&b. (keep = time1 mean_FBA&b. 
mean_freeship&b. mean_numratings&b.   
mean_posrating&b. 
mean_prime&b.   
mean_starrate&b.
new_offers&b.);
set SELLER_charcs_byasin;
where ASIN = &c. ;
rename mean_FBA = mean_FBA&b.;
rename mean_freeship = mean_freeship&b.;  
rename mean_numratings = mean_numratings&b.;    
rename mean_posrating =  mean_posrating&b.;    
rename mean_prime =  mean_prime&b.;    
rename mean_starrate = mean_starrate&b.;
rename new_offers = new_offers&b.;
run;

data SELLER_charcs_byasin2 ;
merge SELLER_charcs_byasin2 (in=a) SELLER_charcs_byasin&b. (in=b);
by time1;
if a ;
if a and not b then do;
mean_FBA&b. = 0;
mean_freeship&b. = 0;
mean_posrating&b. = 0;
mean_prime&b. =0;
mean_starrate&b. = 0;
new_offers&b.=0;
end;
run;
%mend;
%sellerchar(b = SAM_3p, c = 'B009R1FEQ8'); 
/*%sellerchar(b = DEL_3p, c = 'B073ZG6HXV');  */
%sellerchar(b = TC_3p, c = 'B001DWK4NM'); 
%sellerchar(b = OLY_3p, c = 'B00144MBWQ'); 
%sellerchar(b = RL_3p, c = 'B004HKSENY'); 


/**coding all missings with 0 values**/

proc contents data = SELLER_charcs_byasin2;
run;

data seller_characs_byasin3;
set SELLER_charcs_byasin2;
if	mean_FBAOLY_3p	=. Then	mean_FBAOLY_3p	=0;
if	mean_FBARL_3p	=. Then	mean_FBARL_3p	=0;
if	mean_FBASAM_3p	=. Then	mean_FBASAM_3p	=0;
if	mean_FBATC_3p	=. Then	mean_FBATC_3p	=0;
if	mean_freeshipOLY_3p	=. Then	mean_freeshipOLY_3p	=0;
if	mean_freeshipRL_3p	=. Then	mean_freeshipRL_3p	=0;
if	mean_freeshipSAM_3p	=. Then	mean_freeshipSAM_3p	=0;
if	mean_freeshipTC_3p	=. Then	mean_freeshipTC_3p	=0;
if	mean_numratingsOLY_3p	=. Then	mean_numratingsOLY_3p	=0;
if	mean_numratingsRL_3p	=. Then	mean_numratingsRL_3p	=0;
if	mean_numratingsSAM_3p	=. Then	mean_numratingsSAM_3p	=0;
if	mean_numratingsTC_3p	=. Then	mean_numratingsTC_3p	=0;
if	mean_posratingOLY_3p	=. Then	mean_posratingOLY_3p	=0;
if	mean_posratingRL_3p	=. Then	mean_posratingRL_3p	=0;
if	mean_posratingSAM_3p	=. Then	mean_posratingSAM_3p	=0;
if	mean_posratingTC_3p	=. Then	mean_posratingTC_3p	=0;
if	mean_primeOLY_3p	=. Then	mean_primeOLY_3p	=0;
if	mean_primeRL_3p	=. Then	mean_primeRL_3p	=0;
if	mean_primeSAM_3p	=. Then	mean_primeSAM_3p	=0;
if	mean_primeTC_3p	=. Then	mean_primeTC_3p	=0;
if	mean_starrateOLY_3p	=. Then	mean_starrateOLY_3p	=0;
if	mean_starrateRL_3p	=. Then	mean_starrateRL_3p	=0;
if	mean_starrateSAM_3p	=. Then	mean_starrateSAM_3p	=0;
if	mean_starrateTC_3p	=. Then	mean_starrateTC_3p	=0;
if	new_offersOLY_3p	=. Then	new_offersOLY_3p	=0;
if	new_offersRL_3p	=. Then	new_offersRL_3p	=0;
if	new_offersSAM_3p	=. Then	new_offersSAM_3p	=0;
if	new_offersTC_3p	=. Then	new_offersTC_3p	=0;

run;

proc sort data = seller_characs_byasin3 nodupkey out=seller_characs_byasin4 ;
by  TIME1 ;
run;

proc expand data=seller_characs_byasin4 out=seller_characs_byasin5 method = none; 
convert	mean_FBAOLY_3p	=	mean_FBAOLY_3p_lag	/transformout = (lag 1);
convert	mean_FBARL_3p	=	mean_FBARL_3p_lag	/transformout = (lag 1);
convert	mean_FBASAM_3p	=	mean_FBASAM_3p_lag	/transformout = (lag 1);
convert	mean_FBATC_3p	=	mean_FBATC_3p_lag	/transformout = (lag 1);
convert	mean_freeshipOLY_3p	=	mean_freeshipOLY_3p_lag	/transformout = (lag 1);
convert	mean_freeshipRL_3p	=	mean_freeshipRL_3p_lag	/transformout = (lag 1);
convert	mean_freeshipSAM_3p	=	mean_freeshipSAM_3p_lag	/transformout = (lag 1);
convert	mean_freeshipTC_3p	=	mean_freeshipTC_3p_lag	/transformout = (lag 1);
convert	mean_numratingsOLY_3p	=	mean_numratingsOLY_3p_lag	/transformout = (lag 1);
convert	mean_numratingsRL_3p	=	mean_numratingsRL_3p_lag	/transformout = (lag 1);
convert	mean_numratingsSAM_3p	=	mean_numratingsSAM_3p_lag	/transformout = (lag 1);
convert	mean_numratingsTC_3p	=	mean_numratingsTC_3p_lag	/transformout = (lag 1);
convert	mean_posratingOLY_3p	=	mean_posratingOLY_3p_lag	/transformout = (lag 1);
convert	mean_posratingRL_3p	=	mean_posratingRL_3p_lag	/transformout = (lag 1);
convert	mean_posratingSAM_3p	=	mean_posratingSAM_3p_lag	/transformout = (lag 1);
convert	mean_posratingTC_3p	=	mean_posratingTC_3p_lag	/transformout = (lag 1);
convert	mean_primeOLY_3p	=	mean_primeOLY_3p_lag	/transformout = (lag 1);
convert	mean_primeRL_3p	=	mean_primeRL_3p_lag	/transformout = (lag 1);
convert	mean_primeSAM_3p	=	mean_primeSAM_3p_lag	/transformout = (lag 1);
convert	mean_primeTC_3p	=	mean_primeTC_3p_lag	/transformout = (lag 1);
convert	mean_starrateOLY_3p	=	mean_starrateOLY_3p_lag	/transformout = (lag 1);
convert	mean_starrateRL_3p	=	mean_starrateRL_3p_lag	/transformout = (lag 1);
convert	mean_starrateSAM_3p	=	mean_starrateSAM_3p_lag	/transformout = (lag 1);
convert	mean_starrateTC_3p	=	mean_starrateTC_3p_lag	/transformout = (lag 1);
convert	new_offersOLY_3p	=	new_offersOLY_3p_lag	/transformout = (lag 1);
convert	new_offersRL_3p	=	new_offersRL_3p_lag	/transformout = (lag 1);
convert	new_offersSAM_3p	=	new_offersSAM_3p_lag	/transformout = (lag 1);
convert	new_offersTC_3p	=	new_offersTC_3p_lag	/transformout = (lag 1);
run;



/*****REMAINING COVARIATES ARE FROM THE ORIGINAL DV - FINAL DATA SET*****/

PROC IMPORT
DATAFILE="D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Luggage\final_gam_modeldata_JMRnd4_July2025.csv" 
out=final_gam_modeldata
DBMS=csv REPLACE;
run;



proc sort data = final_numsellers_outputvector out = sorted_finalnumsellersoutput;
by time1;
run;

proc sort data = final_gam_modeldata out = sorted_final_GAMMODELDATA;
by time1;
run;


proc sort data = seller_characs_byasin5 out = sorted_seller_characs_byasin5;
by time1;
run;


data finalnumsellers_modeldata;
merge sorted_finalnumsellersoutput (in=a) sorted_final_GAMMODELDATA (in=b) sorted_seller_characs_byasin5 (in=c);
by time1;
if a ;
run;




/**Adding special Amazon holidays***/

/*Valentine's day sale: Feb 5th - Feb 14th 

Easter day sale: March 19-March 26 (2018)

Mothers day: May 13 (2018)

Amazon prime day: JUly 16 (2018); post-prime day sale July 18 (2018)

Black Friday:
    Nov 17th - Nov 24th (2017)
    Nov 16th - Nov 23rd (2018)

Cyber Monday:
	Nov 27th (2017), Nov 26th (2018)

Amazon digital day: Dec 29 (2017), Dec 28 (2018)

12 days of deal sale: Dec 3rd - Dec 14 (2017), Dec 2nd - Dec 13 (2018)

Christmas day sale: Dec 20-Dec25

Hanukkah sale: Dec 1- Dec 7

Post Christmas sale: Dec 26- Jan 1st*/

data finalnumsellers_modeldata1;
set finalnumsellers_modeldata;
if time1 in ( '01Jan2018'd , 
              '05Feb2018'd, '06Feb2018'd,'07Feb2018'd,'08Feb2018'd,'09Feb2018'd,'10Feb2018'd,'11Feb2018'd,
              '12Feb2018'd, '13Feb2018'd,'14Feb2018'd,
			  '19Mar2018'd, '20Mar2018'd,'21Mar2018'd,'22Mar2018'd,'23Mar2018'd,'24Mar2018'd,'25Mar2018'd,'26Mar2018'd,
			  '13May2018'd, '16Jul2018'd, '18Jul2018'd, 
              '17Nov2017'd,'18Nov2017'd,'19Nov2017'd,'20Nov2017'd,'21Nov2017'd,'22Nov2017'd, '23Nov2017'd,'24Nov2017'd,'27Nov2017'd,
			  '16Nov2018'd,'17Nov2018'd,'18Nov2018'd,'19Nov2018'd,'20Nov2018'd,'21Nov2018'd,'22Nov2018'd, '23Nov2018'd,'26Nov2018'd,
              '03Dec2017'd,'04Dec2017'd,'05Dec2017'd,'06Dec2017'd,'07Dec2017'd,'08Dec2017'd,
              '09Dec2017'd,'10Dec2017'd,'11Dec2017'd,'12Dec2017'd,'13Dec2017'd,'14Dec2017'd,
			  '02Dec2018'd,'03Dec2018'd,'04Dec2018'd,'05Dec2018'd,'06Dec2018'd,'07Dec2018'd,'08Dec2018'd,
              '09Dec2018'd,'10Dec2018'd,'11Dec2018'd,'12Dec2018'd,'13Dec2018'd) then seasonal_sale = 1;
else if time1 >= '20Dec2017'd and time1 <= '31Dec2017'd then seasonal_sale = 1;
else if time1 >= '20Dec2018'd and time1 <= '31Dec2018'd then seasonal_sale = 1;
else seasonal_sale = 0;
run;

proc freq data = finalnumsellers_modeldata1;
tables season*seasonal_sale /list;
run;



PROC EXPORT 
DATA=finalnumsellers_modeldata1
DBMS=csv
OUTFILE= "D:\Amazon Price Dynamics\New Data\GAM + MVRF\Other categories\Luggage\finalnumsellers_modeldata1_JmRnd4_July2025.csv"
REPLACE;
run;


